Integrative analysis reveals therapeutic potential of pyrviniym pamoate in Merkel cell carcinoma


MCC signature genes (MCC1000) reversal analysis in LINCS L1000 dataset

back to home

library(cmapR)
library(ggplot2)
library(ggrepel)
library(dplyr)
library(circlize)
library(RColorBrewer)
library(migest)

Importing data

#load L1000 files===============
DIR<-c("/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/l1000/")
FILES<-list.files(path = paste0(DIR, "WNT_inhibitor"), pattern = "*_info.txt", full.names = T, recursive = T)
#FILES<-list.files(path = paste0(DIR, "WNT_related_conditions"), pattern = "*_info.txt", full.names = T, recursive = T)
gene_info<-read.table(file = paste0(DIR, "GSE92742_Broad_LINCS_gene_info.txt"), sep = "\t", quote = "", header = T)
cell_info<-read.delim2(file = paste0(DIR, "GSE92742_Broad_LINCS_cell_info.txt"), sep = "\t", quote = "", header = T)
colnames<-read.table(file = paste0(DIR, "colnames.txt"))
#load gse39612 data set ====================
gse39612_renormalized_edat<-readRDS(file = "/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/gse39612/renormalized_samples_edat_no_celllines.RDS")

#load NE marker gene list ================
neuroendocrine_markers<-c("ENO2", "NEFM", "NEFH", "NMB", "HES6", "SOX2", "ATOH1", "CHGA")

Performing Pearson Correlation analysis with GSE39612 data: correlat the expression value of all genes with NE marker genes

gse39612_cor<-readRDS(file = "/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/gse39612/gse39612_cor.RDS")
gse39612_NE_cor<-gse39612_cor[neuroendocrine_markers,]
NE_score<-apply(t(gse39612_NE_cor), 1, sum)
NE_score_sorted<-NE_score[order(NE_score, decreasing = T)]
MCC1000<-rbind(data.frame(MCC_score = head(NE_score_sorted, 500)), data.frame(MCC_score = tail(NE_score_sorted, 500)))

Overlapping the MCC1000 genes with perturbed top 1000 genes in LINCS L1000 data under different perturbagens

n=500
gse39612_high_NE_score_genes<-names(NE_score_sorted[1:n])
gse39612_low_NE_score_genes<-names(rev(NE_score_sorted)[1:n])

annotate_genes<-list()
for (i in 1:length(FILES)){
  RDS_FILES<-list.files(path = paste0(DIR, "WNT_inhibitor"), pattern = "*.RDS", full.names = T, recursive = T)
  level4_all<-readRDS(RDS_FILES[i])
  drug_lable<-strsplit(strsplit(RDS_FILES[i], "/")[[1]][10], "_")[[1]][1]

  gene_info$pr_gene_id <- as.character(gene_info$pr_gene_id)
  level4_matrix<-mat(level4_all)
  level4_zscore<-as.data.frame(level4_matrix)
  level4_zscore$pr_gene_id<-rownames(level4_zscore)
  level4_zscore<-left_join(level4_zscore, gene_info, by = "pr_gene_id")
  rownames(level4_zscore)<-level4_zscore$pr_gene_symbol
  level4_zscore_analysis<-level4_zscore[, 1:(ncol(level4_zscore)-5)]
  level4_zscore_analysis$mean<-apply(level4_zscore_analysis, 1, mean)
  level4_zscore_analysis$gene<-rownames(level4_zscore_analysis)

  #Organize l1000 level4 sample info data ============================
  samples_info<-read.table(file = FILES[i] ,sep = "\t", header = F)
  colnames(samples_info)<-colnames
  group_all=samples_info[,c("sample_id" ,"pert_itime", "pert_idose", "cell_iname" , "project_code")]
  rownames(group_all)<-samples_info$sample_id
  group_all<-group_all[colnames(level4_zscore),]
  group_all<-na.omit(group_all)

  cell_info_df<-cell_info[,c("base_cell_id", "primary_site", "subtype", "original_growth_pattern")]
  colnames(cell_info_df)<-c("cell_iname","primary_site", "subtype", "original_growth_pattern")

  group_all_new<-left_join(group_all, cell_info_df, by = "cell_iname", keep = F)
  group_all<-group_all_new[!duplicated(group_all_new$sample_id),]
  group_all$dosage<-unlist(lapply(group_all$pert_idose, function(x) strsplit(x, " ")[[1]][1]))
  group_all<-group_all[group_all$dosage <= 10,]
  
  L1000_zscored_fc<-level4_zscore_analysis
  L1000_sample_info<-group_all

  all_fc_edat<-L1000_zscored_fc #total 12328 genes
  df<-all_fc_edat

  L1000_top_downregulated_genes_all<-all_fc_edat[order(all_fc_edat$mean, decreasing = F),][1:n,]
  L1000_top_upregulated_genes_all<-all_fc_edat[order(all_fc_edat$mean, decreasing = T),][1:n,]
  L1000_gene_set<-c(rownames(L1000_top_downregulated_genes_all), rownames(L1000_top_upregulated_genes_all))
  L1000_null_gene_set<-rownames(df)[!rownames(df) %in% L1000_gene_set]
  
  MCC1000_gene_set<-c(gse39612_high_NE_score_genes, gse39612_low_NE_score_genes)
  MCC1000_null_gene_set<-names(NE_score)[!names(NE_score) %in% MCC1000_gene_set]
  
  annotate_down<-intersect(gse39612_high_NE_score_genes, rownames(L1000_top_downregulated_genes_all))
  annotate_up<-intersect(gse39612_low_NE_score_genes, rownames(L1000_top_upregulated_genes_all))
  
  annotate_gene_set<-c(annotate_down, annotate_down)
  annotate_genes[[drug_lable]]<-annotate_gene_set
  print(annotate_gene_set)
  
  annotate_down_df<-all_fc_edat[annotate_down,]
  annotate_up_df<-all_fc_edat[annotate_up,]
  
  # Perform fisher's exact =========
   dat<- data.frame(
  "MCC1000_yes" = c(length(intersect(MCC1000_gene_set, L1000_gene_set)), length(intersect(MCC1000_gene_set, L1000_null_gene_set))),
  "MCC1000_no" = c(length(intersect(MCC1000_null_gene_set, L1000_gene_set)), length(intersect(MCC1000_null_gene_set, L1000_null_gene_set))),
  row.names = c("l1000_yes", "l1000_no"),
  stringsAsFactors = FALSE)
  print(drug_lable)
  test <- fisher.test(dat)
  print(test)

  #plot for each drug ==========
  df<-na.omit(df)

  p1 = ggplot(data = df, 
              aes(x=reorder(gene, mean), y=mean),
              label = gene)+
    geom_point(color = "grey75", 
               size = 5
    )+
    geom_point(data = annotate_down_df , # New layer containing data subset il_genes       
               size = 3,
               shape = 21,
               fill = "firebrick",
               colour = "black")+
    geom_text_repel(data = annotate_down_df, aes(label = gene), size = 6, fontface = "bold", colour = "black", force = 3, max.overlaps = 70) +
    geom_point(data = annotate_up_df , # New layer containing data subset il_genes       
               size = 3,
               shape = 21,
               fill = "blue",
               colour = "black")+
    geom_text_repel(data = annotate_up_df, aes(label = gene), size = 6, fontface = "bold", colour = "black", force = 3, max.overlaps = 70) +
    ylab(paste0("Mean of Z-scored fold change of all cell lines"))+
    xlab("Genes")+
    scale_x_discrete(expand = c(0.15,0))+
    lims(y = c(-7.5, 5))+
    ggtitle(paste0("L1000 top dysregulated genes overlapping with GSE39612 MCC1000 genes \n -", drug_lable))+
    theme(axis.text.x = element_blank(),
          plot.title = element_text(size = 30, face = "bold"),
          panel.background = element_blank(),
          axis.line.y = element_line(),
          #plot.margin = unit(c(1,1,1,1), "cm"),
          axis.title.x = element_text(size=30, face="bold"),
          axis.title.y = element_text(size=30, face="bold"),
          axis.text.y = element_text(size=30, face="bold"))
  plot(p1)
}
##  [1] "PARP1"    "TYMS"     "CDC45"    "TIMELESS" "PPM1G"    "CTPS1"   
##  [7] "ZWINT"    "LIG1"     "KIF20A"   "EEF1A2"   "ATF5"     "UCHL1"   
## [13] "CCNB1"    "EEF1E1"   "CCNB2"    "TOP2A"    "RAD51C"   "PCNA"    
## [19] "URB2"     "SUGP2"    "CDC20"    "TXNRD1"   "CDC25B"   "FAM216A" 
## [25] "PARP1"    "TYMS"     "CDC45"    "TIMELESS" "PPM1G"    "CTPS1"   
## [31] "ZWINT"    "LIG1"     "KIF20A"   "EEF1A2"   "ATF5"     "UCHL1"   
## [37] "CCNB1"    "EEF1E1"   "CCNB2"    "TOP2A"    "RAD51C"   "PCNA"    
## [43] "URB2"     "SUGP2"    "CDC20"    "TXNRD1"   "CDC25B"   "FAM216A" 
## [1] "IWR-1-ENDO"
## 
##  Fisher's Exact Test for Count Data
## 
## data:  dat
## p-value = 0.0967
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.9521115 1.5992295
## sample estimates:
## odds ratio 
##   1.241408

##   [1] "FEN1"    "MCM6"    "PARP1"   "MCM2"    "STMN1"   "FANCI"   "BUB1B"  
##   [8] "TYMS"    "RRM2"    "TRIP13"  "DEPDC1"  "CDC25A"  "LPCAT1"  "BUB1"   
##  [15] "LBR"     "AURKA"   "ZWINT"   "DLGAP5"  "HJURP"   "GINS1"   "LIG1"   
##  [22] "H2AFX"   "MCM10"   "MKI67"   "KIF4A"   "UBE2C"   "KIF20A"  "GTSE1"  
##  [29] "DDX39A"  "EEF1A2"  "ATF5"    "MCM4"    "CCNB1"   "BIRC5"   "CCNB2"  
##  [36] "KIF18B"  "TOP2A"   "PRC1"    "CDCA8"   "NARF"    "OIP5"    "CCNA2"  
##  [43] "PTTG1"   "HMMR"    "NCAPH"   "LMNB2"   "TACC3"   "PCNA"    "PSRC1"  
##  [50] "ZNF324"  "CHAF1A"  "SNRNP25" "ASPM"    "MCM5"    "CDC20"   "TPX2"   
##  [57] "TTK"     "FEN1"    "MCM6"    "PARP1"   "MCM2"    "STMN1"   "FANCI"  
##  [64] "BUB1B"   "TYMS"    "RRM2"    "TRIP13"  "DEPDC1"  "CDC25A"  "LPCAT1" 
##  [71] "BUB1"    "LBR"     "AURKA"   "ZWINT"   "DLGAP5"  "HJURP"   "GINS1"  
##  [78] "LIG1"    "H2AFX"   "MCM10"   "MKI67"   "KIF4A"   "UBE2C"   "KIF20A" 
##  [85] "GTSE1"   "DDX39A"  "EEF1A2"  "ATF5"    "MCM4"    "CCNB1"   "BIRC5"  
##  [92] "CCNB2"   "KIF18B"  "TOP2A"   "PRC1"    "CDCA8"   "NARF"    "OIP5"   
##  [99] "CCNA2"   "PTTG1"   "HMMR"    "NCAPH"   "LMNB2"   "TACC3"   "PCNA"   
## [106] "PSRC1"   "ZNF324"  "CHAF1A"  "SNRNP25" "ASPM"    "MCM5"    "CDC20"  
## [113] "TPX2"    "TTK"    
## [1] "PRI-724"
## 
##  Fisher's Exact Test for Count Data
## 
## data:  dat
## p-value = 4.077e-05
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  1.299362 2.082383
## sample estimates:
## odds ratio 
##   1.652257

##  [1] "COL9A2"   "CDKN2A"   "TYMS"     "MTHFD2"   "CDC45"    "DNAJC6"  
##  [7] "BRINP1"   "EEF1A2"   "CCNB1"    "DNMT3A"   "BCL2L11"  "CCNB2"   
## [13] "TTLL7"    "PHC2"     "PCNA"     "EIF4EBP1" "CDC20"    "PARD6A"  
## [19] "HDAC9"    "TOX"      "COL9A2"   "CDKN2A"   "TYMS"     "MTHFD2"  
## [25] "CDC45"    "DNAJC6"   "BRINP1"   "EEF1A2"   "CCNB1"    "DNMT3A"  
## [31] "BCL2L11"  "CCNB2"    "TTLL7"    "PHC2"     "PCNA"     "EIF4EBP1"
## [37] "CDC20"    "PARD6A"   "HDAC9"    "TOX"     
## [1] "XAV-939"
## 
##  Fisher's Exact Test for Count Data
## 
## data:  dat
## p-value = 0.0001013
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  1.272368 2.043034
## sample estimates:
## odds ratio 
##   1.619585

##  [1] "SOX2"     "COL9A2"   "PAFAH1B3" "STMN1"    "CDC45"    "CACNA2D2"
##  [7] "LBR"      "AURKA"    "LIG1"     "KIF20A"   "KIF14"    "BRINP1"  
## [13] "EEF1A2"   "SPAG5"    "CCNB1"    "UNC13A"   "CCNB2"    "TOP2A"   
## [19] "CHGA"     "SMC4"     "NARF"     "PCNA"     "PSRC1"    "KIF2C"   
## [25] "EZH2"     "CDC20"    "NT5DC3"   "YEATS2"   "SOX2"     "COL9A2"  
## [31] "PAFAH1B3" "STMN1"    "CDC45"    "CACNA2D2" "LBR"      "AURKA"   
## [37] "LIG1"     "KIF20A"   "KIF14"    "BRINP1"   "EEF1A2"   "SPAG5"   
## [43] "CCNB1"    "UNC13A"   "CCNB2"    "TOP2A"    "CHGA"     "SMC4"    
## [49] "NARF"     "PCNA"     "PSRC1"    "KIF2C"    "EZH2"     "CDC20"   
## [55] "NT5DC3"   "YEATS2"  
## [1] "indirubin"
## 
##  Fisher's Exact Test for Count Data
## 
## data:  dat
## p-value = 0.002835
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  1.132772 1.852121
## sample estimates:
## odds ratio 
##   1.455863

##  [1] "PARP1"    "UBE2S"    "PAFAH1B3" "CDC45"    "KIF20A"   "DTYMK"   
##  [7] "DDX39A"   "KIF14"    "EEF1A2"   "ATF5"     "CCNB1"    "CCNB2"   
## [13] "PCNA"     "EIF4EBP1" "CDC20"    "TXNRD1"   "CDC25B"   "PARP1"   
## [19] "UBE2S"    "PAFAH1B3" "CDC45"    "KIF20A"   "DTYMK"    "DDX39A"  
## [25] "KIF14"    "EEF1A2"   "ATF5"     "CCNB1"    "CCNB2"    "PCNA"    
## [31] "EIF4EBP1" "CDC20"    "TXNRD1"   "CDC25B"  
## [1] "mesalazine"
## 
##  Fisher's Exact Test for Count Data
## 
## data:  dat
## p-value = 0.6257
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.8019211 1.3939069
## sample estimates:
## odds ratio 
##   1.064963

##   [1] "FEN1"     "MCM6"     "PARP1"    "UBE2S"    "PAFAH1B3" "MCM2"    
##   [7] "STMN1"    "FANCI"    "NETO2"    "RNASEH2A" "TYMS"     "GINS2"   
##  [13] "CDC45"    "TIMELESS" "NCAPG"    "RRM2"     "ESPL1"    "TRIP13"  
##  [19] "NCAPG2"   "DEPDC1"   "NEK2"     "CDC25A"   "CTPS1"    "LPCAT1"  
##  [25] "FOXM1"    "BUB1"     "LBR"      "AURKA"    "ZWINT"    "DLGAP5"  
##  [31] "CDC7"     "HJURP"    "GINS1"    "LIG1"     "H2AFX"    "MCM10"   
##  [37] "MKI67"    "KIF4A"    "UBE2C"    "KIF20A"   "DTYMK"    "GTSE1"   
##  [43] "TRMT5"    "FBXO5"    "KIF14"    "CENPE"    "CKS2"     "CENPM"   
##  [49] "SPAG5"    "CCNB1"    "BIRC5"    "NDC80"    "CCNB2"    "MELK"    
##  [55] "KIF18B"   "TOP2A"    "MYBL2"    "ASF1B"    "PRC1"     "CDCA8"   
##  [61] "SMC4"     "XPO6"     "OIP5"     "CCNA2"    "NEIL3"    "PTTG1"   
##  [67] "POLE2"    "HMMR"     "NCAPH"    "TACC3"    "PCNA"     "PSRC1"   
##  [73] "KIF18A"   "KIF2C"    "SNRNP25"  "EZH2"     "ASPM"     "MCM5"    
##  [79] "CDC20"    "SPC25"    "TPX2"     "CDC25B"   "CEP55"    "TTK"     
##  [85] "FEN1"     "MCM6"     "PARP1"    "UBE2S"    "PAFAH1B3" "MCM2"    
##  [91] "STMN1"    "FANCI"    "NETO2"    "RNASEH2A" "TYMS"     "GINS2"   
##  [97] "CDC45"    "TIMELESS" "NCAPG"    "RRM2"     "ESPL1"    "TRIP13"  
## [103] "NCAPG2"   "DEPDC1"   "NEK2"     "CDC25A"   "CTPS1"    "LPCAT1"  
## [109] "FOXM1"    "BUB1"     "LBR"      "AURKA"    "ZWINT"    "DLGAP5"  
## [115] "CDC7"     "HJURP"    "GINS1"    "LIG1"     "H2AFX"    "MCM10"   
## [121] "MKI67"    "KIF4A"    "UBE2C"    "KIF20A"   "DTYMK"    "GTSE1"   
## [127] "TRMT5"    "FBXO5"    "KIF14"    "CENPE"    "CKS2"     "CENPM"   
## [133] "SPAG5"    "CCNB1"    "BIRC5"    "NDC80"    "CCNB2"    "MELK"    
## [139] "KIF18B"   "TOP2A"    "MYBL2"    "ASF1B"    "PRC1"     "CDCA8"   
## [145] "SMC4"     "XPO6"     "OIP5"     "CCNA2"    "NEIL3"    "PTTG1"   
## [151] "POLE2"    "HMMR"     "NCAPH"    "TACC3"    "PCNA"     "PSRC1"   
## [157] "KIF18A"   "KIF2C"    "SNRNP25"  "EZH2"     "ASPM"     "MCM5"    
## [163] "CDC20"    "SPC25"    "TPX2"     "CDC25B"   "CEP55"    "TTK"     
## [1] "pyrvinium"
## 
##  Fisher's Exact Test for Count Data
## 
## data:  dat
## p-value = 8.993e-09
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  1.571251 2.459727
## sample estimates:
## odds ratio 
##   1.973272

Performing pairwise intersection on overlapped genes in different Wnt-perturbing conditions

Organizing data

fset<-unique(unlist(annotate_genes))

#pairwise intersection
conditions<-unlist(lapply(RDS_FILES, function(x) strsplit(strsplit(x, "/")[[1]][10], "_")[[1]][1]))
combs<-combn(length(conditions), 2)
summary<-data.frame()
for (i in 1:ncol(combs)){
  pair_id<-combs[,i]
  set1<-annotate_genes[[pair_id[1]]]
  set2<-annotate_genes[[pair_id[2]]]
  set1id<-conditions[pair_id[1]]
  set2id<-conditions[pair_id[2]]
  int<-intersect(set1, set2)
  df<-data.frame(set1 = set1id, set2 = set2id, intersection = paste(int, collapse = "/"), gene_count = length(int))
  #df<-data.frame(set1 = set1id, set2 = set2id, intersection = int, gene_count = length(int))
  summary<-rbind(summary, df)
}
summary$set1<-as.factor(summary$set1)
summary$set2<-as.factor(summary$set2)

Generating circos plot from the pariwise intersection

### Make data
colors<-brewer.pal(n = length(conditions), "Set2")
df1_new<-data.frame(order = 1:length(conditions), 
                    conditions = conditions,
                    rcol = colors,
                    lcol = paste0(colors, "65"),
                    xmin = rep(0, length(conditions)),
                    xmax = unlist(lapply(annotate_genes, function(x) length(x))))
                    
### Plot sectors (outer part)
par(mar=rep(0,4))
circos.clear()
 
### Basic circos graphic parameters
circos.par(cell.padding=c(0,0,0,0), track.margin=c(0,0.15), start.degree = 90, gap.degree = 4) 
 
### Sector details
circos.initialize(factors = df1_new$conditions, xlim = cbind(df1_new$xmin, df1_new$xmax)) 
 
### Plot sectors
circos.trackPlotRegion(ylim = c(0, 1), factors = df1_new$conditions, track.height=0.1,
                      #panel.fun for each sector
                      panel.fun = function(x, y) {
                      #select details of current sector
                      name = get.cell.meta.data("sector.index")
                      i = get.cell.meta.data("sector.numeric.index")
                      xlim = get.cell.meta.data("xlim")
                      ylim = get.cell.meta.data("ylim")
 
                      #text direction (dd) and adjusmtents (aa)
                      theta = circlize(mean(xlim), 1.3)[1, 1] %% 360
                      dd <- ifelse(theta < 90 || theta > 270, "clockwise", "reverse.clockwise")
                      aa = c(1, 0.5)
                      if(theta < 90 || theta > 270)  aa = c(0, 0.5)
 
                      #plot conditions labels
                      circos.text(x=mean(xlim), y=1.7, labels=name, facing = dd, cex=1.7, font=2, adj = aa)
 
                      #plot main sector
                      circos.rect(xleft=xlim[1], ybottom=ylim[1], xright=xlim[2], ytop=ylim[2], 
                                  col = df1_new$rcol[i], border=df1_new$rcol[i])
 
                      #plot axis
                      circos.axis(labels.cex=1.5, direction = "outside", major.at=seq(from=0,to=floor(df1_new$xmax)[i], by = floor(df1_new$xmax)[i])
                                  ,minor.ticks=0
                                  )
                    })

### Plot links (inner part)
### Add sum values to df1, marking the x-position of the first links
### out (sum1) and in (sum2). Updated for further links in loop below.
df1_new$sum1 <- df1_new$xmax
df1_new$sum2 <- numeric(nrow(df1_new))
df1_new$genes <- "/NA"
#df1_new$conditions <- factor(df1_new$conditions, levels = df1_new$conditions)
### Create a data.frame of the flow matrix sorted by flow size, to allow largest flow plotted first
df2_new<-summary[, c("set1", "set2", "gene_count", "intersection")]
df2_new<-arrange(df2_new, desc(gene_count))

### Plot links
for(k in 1:nrow(df2_new)){
    #i,j reference of flow matrix
    i<-match(df2_new$set1[k],df1_new$conditions)
    j<-match(df2_new$set2[k],df1_new$conditions)
    genes<-strsplit(subset(df2_new, set1 == df1_new$conditions[i] & set2 == df1_new$conditions[j])[,"intersection"], "/")[[1]]
#plot link
circos.link(sector.index1=df1_new$conditions[i], 
            point1=c(df1_new$sum2[i]-length(intersect(genes, strsplit(df1_new$genes[i], "/")[[1]])), df1_new$sum2[i] + df2_new$gene_count[k]),
            sector.index2=df1_new$conditions[j],
            point2=c(df1_new$sum2[j]-length(intersect(genes, strsplit(df1_new$genes[j], "/")[[1]])), df1_new$sum2[j] + df2_new$gene_count[k]), 
            col = df1_new$lcol[i], rou1 = 0.72, rou2 = 0.72)

df1_new$sum2[i]<-df1_new$sum2[i] + df2_new$gene_count[k] - length(intersect(genes, strsplit(df1_new$genes[i], "/")[[1]]))
df1_new$sum2[j]<-df1_new$sum2[j] + df2_new$gene_count[k] - length(intersect(genes, strsplit(df1_new$genes[j], "/")[[1]]))

df1_new$genes[i]<-paste(unique(c(strsplit(df1_new$genes[i], "/")[[1]], genes)), collapse = "/")
df1_new$genes[j]<-paste(unique(c(strsplit(df1_new$genes[j], "/")[[1]], genes)), collapse = "/")
}

back to top

back to home